#004 - Import Data for Multiple Assets¶

On this code we import data from YFinance, export and read a CSV file, then perform some analysis and organize the DataFrame.

From #001;

Import libraries

  • Link to Pandas Documentation: https://pandas.pydata.org/docs/index.html

  • Link to datareader Documentation: https://pandas-datareader.readthedocs.io/en/latest/index.html

  • Link to Yf Documentation: https://pandas-datareader.readthedocs.io/en/latest/readers/yahoo.html

  • Link to Plotly Documentation: https://plotly.com/python/

In [1]:
#    !pip install pandas
#    !pip install pandas-datareader
#    !pip install numpy
#    !pip install yfinance
#    !pip install datetime
#    !pip install plotly_express
In [2]:
#import Libraries
import pandas as pd
from pandas_datareader import data as pdr
import numpy as np
import yfinance as yf
import datetime as dt
import plotly.express as px
import plotly.graph_objects as go

4.1 Define a portfolio, obtain and export data¶

In [3]:
# Code to obtain stock data using Pandas Datareader
# Select the start/end dates and ticker symbols
# Data is saved in a stock_data.csv file

tickers = [item for item in input("Enter the stock tickers, for portfolio (space them only) : ").split()]

file_name = "Portfolio.csv"

yf.pdr_override()
Enter the stock tickers, for portfolio (space them only) : PETR4.SA VALE3.SA TAEE11.SA ITSA4.SA WEGE3.SA
In [4]:
# Define the start and end dates, last 5 years
end = dt.datetime.now()
start = end - dt.timedelta(days = 365*5)
In [5]:
#obtain data from Yahoo Finance
df = pdr.get_data_yahoo(tickers, start = start, end = end)
df
[*********************100%%**********************]  5 of 5 completed
Out[5]:
Adj Close Close ... Open Volume
ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA ... ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA
Date
2018-12-10 7.291458 8.071823 13.925715 34.242962 8.025093 9.647919 23.440001 22.260000 50.200001 8.605000 ... 9.951726 24.770000 22.309999 51.119999 8.870000 26640254 73871100 1447800.0 14331300 4977400
2018-12-11 7.316282 8.020169 14.294817 34.515808 8.113692 9.680763 23.290001 22.850000 50.599998 8.700000 ... 9.779295 23.920000 22.500000 51.150002 8.720000 35347603 64017600 1776700.0 19443900 6163200
2018-12-12 7.390743 8.023611 14.745242 34.358921 8.067065 9.779295 23.299999 23.570000 50.369999 8.650000 ... 9.730029 23.740000 22.990000 51.150002 8.735000 29719052 63296400 3483500.0 15808900 8927800
2018-12-13 7.508647 8.044274 14.832824 34.515808 8.099705 9.935304 23.360001 23.709999 50.599998 8.685000 ... 9.803928 23.290001 23.600000 50.849998 8.655000 19235305 61946100 2280200.0 15149000 7142600
2018-12-14 7.558293 7.937523 15.089318 34.706806 8.090384 10.000992 23.049999 24.120001 50.880001 8.675000 ... 9.853194 23.080000 23.740000 50.380001 8.620000 18840712 45231700 1890800.0 16910700 7909000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2023-12-04 9.740000 34.910000 36.419998 73.529999 34.759998 9.740000 34.910000 36.419998 73.529999 34.759998 ... 9.740000 35.439999 36.490002 74.239998 34.349998 16317600 35601900 1030800.0 22402300 5514000
2023-12-05 9.830000 34.750000 36.540001 72.849998 35.299999 9.830000 34.750000 36.540001 72.849998 35.299999 ... 9.740000 34.919998 36.419998 73.230003 34.770000 18909200 43234200 2456100.0 25179500 5143900
2023-12-06 9.650000 33.500000 36.310001 72.370003 35.389999 9.650000 33.500000 36.310001 72.370003 35.389999 ... 9.850000 34.549999 36.540001 73.500000 35.430000 16421900 64179200 968900.0 20503900 5700700
2023-12-07 9.730000 33.419998 35.970001 72.599998 35.360001 9.730000 33.419998 35.970001 72.599998 35.360001 ... 9.670000 33.650002 36.310001 72.690002 35.480000 11717000 64723900 1352600.0 17535800 4183000
2023-12-08 9.770000 34.130001 36.009998 72.760002 34.750000 9.770000 34.130001 36.009998 72.760002 34.750000 ... 9.730000 33.709999 35.970001 73.099998 35.360001 6618700 38321500 598900.0 9507900 1991000

1241 rows × 30 columns

4.2 Manipulate and Organize Data¶

In [6]:
#select Ajusted Close Prices
Adj_Close_df = df['Adj Close']
Adj_Close_df
Out[6]:
ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA
Date
2018-12-10 7.291458 8.071823 13.925715 34.242962 8.025093
2018-12-11 7.316282 8.020169 14.294817 34.515808 8.113692
2018-12-12 7.390743 8.023611 14.745242 34.358921 8.067065
2018-12-13 7.508647 8.044274 14.832824 34.515808 8.099705
2018-12-14 7.558293 7.937523 15.089318 34.706806 8.090384
... ... ... ... ... ...
2023-12-04 9.740000 34.910000 36.419998 73.529999 34.759998
2023-12-05 9.830000 34.750000 36.540001 72.849998 35.299999
2023-12-06 9.650000 33.500000 36.310001 72.370003 35.389999
2023-12-07 9.730000 33.419998 35.970001 72.599998 35.360001
2023-12-08 9.770000 34.130001 36.009998 72.760002 34.750000

1241 rows × 5 columns

In [7]:
volume_df = df['Volume']
volume_df
Out[7]:
ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA
Date
2018-12-10 26640254 73871100 1447800.0 14331300 4977400
2018-12-11 35347603 64017600 1776700.0 19443900 6163200
2018-12-12 29719052 63296400 3483500.0 15808900 8927800
2018-12-13 19235305 61946100 2280200.0 15149000 7142600
2018-12-14 18840712 45231700 1890800.0 16910700 7909000
... ... ... ... ... ...
2023-12-04 16317600 35601900 1030800.0 22402300 5514000
2023-12-05 18909200 43234200 2456100.0 25179500 5143900
2023-12-06 16421900 64179200 968900.0 20503900 5700700
2023-12-07 11717000 64723900 1352600.0 17535800 4183000
2023-12-08 6618700 38321500 598900.0 9507900 1991000

1241 rows × 5 columns

In [8]:
#calculate percentage daily return
p_change_df = Adj_Close_df.pct_change() * 100
p_change_df.replace(np.nan, 0, inplace = True)
p_change_df	
Out[8]:
ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA
Date
2018-12-10 0.000000 0.000000 0.000000 0.000000 0.000000
2018-12-11 0.340455 -0.639928 2.650503 0.796795 1.104027
2018-12-12 1.017750 0.042914 3.150968 -0.454537 -0.574671
2018-12-13 1.595295 0.257531 0.593965 0.456612 0.404602
2018-12-14 0.661177 -1.327049 1.729236 0.553364 -0.115081
... ... ... ... ... ...
2023-12-04 -0.102566 -2.130637 -0.191843 -2.246746 -0.057506
2023-12-05 0.924026 -0.458321 0.329497 -0.924793 1.553512
2023-12-06 -1.831132 -3.597122 -0.629446 -0.658882 0.254958
2023-12-07 0.829015 -0.238811 -0.936382 0.317805 -0.084766
2023-12-08 0.411109 2.124485 0.111196 0.220391 -1.725115

1241 rows × 5 columns

In [9]:
# Function to scale stock prices based on their initial starting price
# The objective of this function is to set all prices to start at a value of 1 
def price_scaling(raw_prices_df):
    scaled_prices_df = raw_prices_df.copy()
    for i in raw_prices_df.columns[0:]:
          scaled_prices_df[i] = raw_prices_df[i]/raw_prices_df[i][0]
    return scaled_prices_df
In [10]:
scaling_df = price_scaling(Adj_Close_df)
scaling_df
Out[10]:
ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA
Date
2018-12-10 1.000000 1.000000 1.000000 1.000000 1.000000
2018-12-11 1.003405 0.993601 1.026505 1.007968 1.011040
2018-12-12 1.013617 0.994027 1.058850 1.003386 1.005230
2018-12-13 1.029787 0.996587 1.065139 1.007968 1.009297
2018-12-14 1.036596 0.983362 1.083558 1.013546 1.008136
... ... ... ... ... ...
2023-12-04 1.335810 4.324921 2.615305 2.147303 4.331414
2023-12-05 1.348153 4.305099 2.623923 2.127444 4.398703
2023-12-06 1.323466 4.150240 2.607407 2.113427 4.409918
2023-12-07 1.334438 4.140328 2.582991 2.120144 4.406180
2023-12-08 1.339924 4.228289 2.585863 2.124816 4.330168

1241 rows × 5 columns

In [11]:
Adj_Close_df.describe().round(2)
Out[11]:
ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA
count 1241.00 1241.00 1240.00 1241.00 1241.00
mean 7.72 14.25 27.32 58.17 27.77
std 0.82 7.30 7.52 19.41 10.72
min 5.12 4.16 13.93 23.88 7.85
25% 7.17 9.33 19.94 36.19 18.48
50% 7.72 10.85 30.10 64.16 31.87
75% 8.28 18.41 34.21 73.70 36.34
max 9.83 37.09 38.88 92.17 44.15
In [12]:
p_change_df.describe().round(2)
Out[12]:
ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA
count 1241.00 1241.00 1241.00 1241.00 1241.00
mean 0.04 0.16 0.08 0.09 0.15
std 1.85 2.84 1.28 2.53 2.40
min -10.87 -29.70 -7.04 -24.56 -20.62
25% -1.04 -1.13 -0.62 -1.22 -1.07
50% 0.00 0.14 0.04 0.02 0.04
75% 1.08 1.55 0.75 1.27 1.35
max 10.03 22.22 6.29 21.36 13.89
In [13]:
scaling_df.describe().round(2)
Out[13]:
ITSA4.SA PETR4.SA TAEE11.SA VALE3.SA WEGE3.SA
count 1241.00 1241.00 1240.00 1241.00 1241.00
mean 1.06 1.77 1.96 1.70 3.46
std 0.11 0.90 0.54 0.57 1.34
min 0.70 0.51 1.00 0.70 0.98
25% 0.98 1.16 1.43 1.06 2.30
50% 1.06 1.34 2.16 1.87 3.97
75% 1.13 2.28 2.46 2.15 4.53
max 1.35 4.60 2.79 2.69 5.50

4.3 Define a fucntion and plot Data¶

In [14]:
# Define a function using Plotly Express
def plotly_data(df, title):  
    
    # Create figure
    fig = go.Figure()
   
    # Set title
    fig.update_layout(title_text = title) 
    
    # For loop that plots all stock prices in the pandas dataframe df
    
    for i in df.columns[0:]:
        # Add range slider
        #fig.update_layout(xaxis=dict(rangeselector = dict(buttons=list([dict(count=1, label="1m", step="month", stepmode="backward"), dict(count=6, label="6m", step="month", stepmode="backward"), dict(count=1, label="YTD", step="year", stepmode="todate"), dict(count=1, label="1y", step="year", stepmode="backward"), dict(step="all")])), rangeslider=dict( visible=True), type="date"))
        # Add line graph
        fig.add_scatter(x = df.index, y = df[i], name = i)
        # Update Layout
        fig.update_layout({'plot_bgcolor': "white"})
    
    fig.show()
    
# Define a function using Plotly Express, changes axis y to logarithm scale
def log_plotly_data(df, title):  
    
    # Create figure
    fig = go.Figure()
   
    # Set title
    fig.update_layout(title_text = title) 
    
    # For loop that plots all stock prices in the pandas dataframe df
    
    for i in df.columns[0:]:
        # Add range slider
        #fig.update_layout(xaxis=dict(rangeselector = dict(buttons=list([dict(count=1, label="1m", step="month", stepmode="backward"), dict(count=6, label="6m", step="month", stepmode="backward"), dict(count=1, label="YTD", step="year", stepmode="todate"), dict(count=1, label="1y", step="year", stepmode="backward"), dict(step="all")])), rangeslider=dict( visible=True), type="date"))
        # Add line graph
        fig.add_scatter(x = df.index, y = df[i], name = i)
        # Update Layout
        fig.update_layout({'plot_bgcolor': "white"})
    
    #changes y to logarithm scale
    fig.update_yaxes(type="log")
    fig.show()    
In [15]:
plotly_data(Adj_Close_df, 'Closing Prices [R$]')
In [16]:
log_plotly_data(Adj_Close_df, 'Closing Prices [R$]')
In [17]:
plotly_data(scaling_df, "Scaling Closing Prices")
In [18]:
log_plotly_data(scaling_df, "Scaling Closing Prices")
In [19]:
plotly_data(p_change_df, "Percentage Daily Returns [%]")
In [20]:
plotly_data(volume_df, "Trade Volume")